/* 
do /projects/hsieh_project/proj_201809/code_0_general/p_weight.do

Functions:
p_y1y2: Generates two variables with values in year1 and year2.
p_y1y2_d: Generates the difference between year1 and year2.
p_weight: Calculates the default weight for industry level analysis.
	= (industry employment / total employment in year1 + that in year2) / 2.
*/


/*
--------------------------------------------------------------------------------
Generate two variables with values in year1 and year2

i_var: Raw variable to generate year1 year2 counterparts for.
year1 year2: Initial and terminal years
`i_var'_yy1 `i_var'_yy2: Variables with value in year1 year2 (. when year != year1/2)
`i_var'_y1 `i_var'_y2: Variables with value in year1 year2
*/
capture program drop p_y1y2
program p_y1y2
	args i_var year1 year2
	
	capture drop `i_var'_yy1 `i_var'_yy2 `i_var'_y1 `i_var'_y2
	if _rc == 0 {
		di "_yy _y variables exist"
	}
	gen `i_var'_yy1 = `i_var' if year == `year1'
	gen `i_var'_yy2 = `i_var' if year == `year2'
		
	sort ch_ind year
	by ch_ind: egen `i_var'_y1 = mean(`i_var'_yy1)
	by ch_ind: egen `i_var'_y2 = mean(`i_var'_yy2)
	
end


/*
--------------------------------------------------------------------------------
Calculate the difference between year2 and year1
*/
capture program drop p_y1y2_d
program p_y1y2_d
	args i_var i_perc year1 year2 i_type i_yearsuf i_drop
	/*
	i_var: Variable to generate difference for
	i_perc: Whether related to top firms ("" = not related to top firms)
	year1: Initial year 
	year2: Terminal year
	i_type: "ln" = d ln(`i_var'); "" = d `i_var'.
	i_yearsuf: "0" = do not include year suffix
	i_drop: "0" = do not drop _yy* _y*
	*/
	
	/* Whether to add year suffix to variable name */
	if "`i_yearsuf'" == "0" {
		local vyearsuf = ""
	}
	else {
		local vyearsuf = "_`year1'_`year2'"
	}
	
	/* 
	Depending on whether this is a variable related to top firms,
	generate difference variables accordingly.
	*/
	if "`i_perc'" != "" {
		local v_perc = subinstr("`i_perc'", ".", "", .) // e.g.: 0.1 -> 01
		p_y1y2 "`i_var'_`v_perc'" "`year1'" "`year2'"
		
		if "`i_type'" == "ln" {
			gen ln_`i_var'_d_`v_perc'`vyearsuf' =  ln(`i_var'_`v_perc'_y2) - ln(`i_var'_`v_perc'_y1)
		}
		else {
			gen `i_var'_d_`v_perc'`vyearsuf' =  `i_var'_`v_perc'_y2 - `i_var'_`v_perc'_y1
		}
		if "`i_drop'" != "0" drop `i_var'_`v_perc'_yy1 `i_var'_`v_perc'_yy2 `i_var'_`v_perc'_y1 `i_var'_`v_perc'_y2
	}
	else {
		p_y1y2 "`i_var'" "`year1'" "`year2'"
		if "`i_type'" == "ln" {
			gen ln_`i_var'_d`vyearsuf' =  ln(`i_var'_y2) - ln(`i_var'_y1)
		}
		else {
			gen `i_var'_d`vyearsuf' =  `i_var'_y2 - `i_var'_y1
		}
		if "`i_drop'" != "0" drop `i_var'_yy1 `i_var'_yy2 `i_var'_y1 `i_var'_y2
	}
	
end


/*
--------------------------------------------------------------------------------
Calculate default weight for industry level analysis
The weight is only caluclated for industries that do not have any missing value in the given percentile
*/
capture program drop p_weight
program p_weight
	args year1 year2 l_perc c_emps_ind
	/*
	year1: Initial year
	year2: Terminal year
	l_perc: List of percentile
	c_emps_ind: "0" = do not calculate employment share of top firms
	*/

	if "`l_perc'" == "" local l_perc "10" // Default list of perc if not supplied.
	di "`year1' `year2' `l_perc'"
	
	//local year1 = 1977
	//local year2 = 2013
	
	// Mark if ind in both years
	qui {
	gen in_year = .
	replace in_year = 1 if year ==`year1'
	replace  in_year = 1 if year ==`year2'
	sort ch_ind year
	by ch_ind: egen ind_year = total(in_year)
	drop in_year
	}
	di "Industries missing in any year"
	levelsof ch_ind if ind_year != 2
	di _n(2)

	// Calculate Default Weight
	foreach i_perc in `l_perc' {
		
		di "Now `i_perc'"
		local v_perc = subinstr("`i_perc'", ".", "", .) // e.g.: 0.1 -> 01
		
		//local v_perc "10"
		//local year1 "1977"
		//local year2 "2013"

		sort ch_ind year
		
		// Mark if miss perc in any year
		qui {
		gen in_year_`v_perc' = .
		replace in_year_`v_perc' = 1 if year ==`year1' & miss_`v_perc' != 1
		replace in_year_`v_perc' = 1 if year ==`year2' & miss_`v_perc' != 1
		by ch_ind: egen ind_year_`v_perc' = total(in_year_`v_perc')
		drop in_year_`v_perc'
		}
		di "Industries with missing perc in any year"
		levelsof ch_ind if ind_year_`v_perc' != 2
		
		// Include those in both years and with no missing perc
		gen ind_in_`v_perc' = 1 if ind_year == 2 & ind_year_`v_perc' == 2
		
		// Generate total employment for included industries
		sort year ch_ind
		by year: egen emp_aggin_`v_perc' = total(emp_ind) if ind_in == 1 // Industries with missing percentile in either year are excluded
		
		// Employment share of each industry out of included industries
		gen emps_aggin_`v_perc' = emp_ind / emp_aggin_`v_perc'  // This can be used as weight
		gen ln_emps_aggin_`v_perc' = ln(emps_aggin_`v_perc')
		
		p_y1y2 "emps_aggin_`v_perc'" "`year1'" "`year2'"

		// Default Weight: (Employment share in year1 + that in year2) / 2
		gen w_emps_`v_perc' = (emps_aggin_`v_perc'_y1 + emps_aggin_`v_perc'_y2) / 2
		qui sum w_emps_`v_perc' if year == `year1'
		di "Sum of weight: `r(sum)'" // Should be very close to 1
		//gen w_empsa_`v_perc' = w_emps_`v_perc' / r(sum)
		levelsof ch_ind if w_emps_`v_perc' == .
		
		
		if "`c_emps_ind'" != "0" {
		// Calculate change in employment share of top firms (we may opt to not calculate this)
		p_y1y2 "emps_ind_`v_perc'" "`year1'" "`year2'"
		gen emps_ind_d_`v_perc'_`year1'_`year2' =  emps_ind_`v_perc'_y2 - emps_ind_`v_perc'_y1
		p_y1y2 "ln_emps_ind_`v_perc'" "`year1'" "`year2'"
		gen ln_emps_ind_d_`v_perc'_`year1'_`year2' =  ln_emps_ind_`v_perc'_y2 - ln_emps_ind_`v_perc'_y1
		}
		
		drop *_yy1 *_yy2 *_y1 *_y2
		rename ind_year_`v_perc' ind_year_`v_perc'_`year1'_`year2'
		rename ind_in_`v_perc' ind_in_`v_perc'_`year1'_`year2'
		rename emp_aggin_`v_perc' emp_aggin_`v_perc'_`year1'_`year2'
		rename emps_aggin_`v_perc' emps_aggin_`v_perc'_`year1'_`year2'
		rename ln_emps_aggin_`v_perc' ln_emps_aggin_`v_perc'_`year1'_`year2'
		rename w_emps_`v_perc' w_emps_`v_perc'_`year1'_`year2'

	}
	
	rename ind_year ind_year_`year1'_`year2'
	
/*
Some reference for variable names:
ind_year: # of years industry is in
ind_year_`v_perc': # of years industry-percentile is available
ind_in_`v_perc': Indicator for industries that are in both years and have the specificed percentile in both years.
emp_aggin_`v_perc': Total employment of the subset of industries specified above.
emps_aggin_`v_perc' ln_emps_aggin_`v_perc': Corresponding share variables.
w_emps_`v_perc': Default weight.
*/
end

// End of do file
